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#include <stdio.h> 



#define MATCH_NEEDED_IN_2ND 3 

fldefine LEN_MER 8 // recompile when changed. *things to pay attention to. 
int SecondStract(const char*); 

int CrossHyb(const char *strl, const char *str2, int overlapjength); 
int SimpleMatch(const char *strl, const char *str2); 
char FirstN(const char *strl, const char *str2, int N); 

int GC_ADDITION = 1 ; 
int NUM_GC - 4; 

int SCORE_NEEDED_IN_2ND = 7; // 1+2+4 
chareql,eq2; 

FILE*fp = stdout; 
FILE *fplog = stderr, 

main(int argc, char **argv) 
{ 

int ii, jj, comp_score, s; 

int MM[LEN_MER]; 

char line[256], str[LEN_MER+l]; 

int gcsum, pass_gc, total_probes, failed_m, failed_ch, failed_sm; 
char convert[4]; // 0123 to atcg convertion. 
char * compatible; 

int max_prb, cnt_prb; 
char **probe; 

int max_snd, cnt_snd; 
char **sndstr, 
int *snd_matchcnt; 

int SIMPLE_CUTOFF = 5; // reject if this many bps match to each other, 

// no matter where they are located, 
int CROSSHYBCUTOFF = 9; // 1+2+4 + 2 
int CROSSHYB_OVERLAP = 5; 

int FIRST_N = 4; 



eql=eq2 = "; 
if(argc = 1) 

{ 
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fprintf(stderr, 

"Usage: %s -o output_file[stdout]\n", 

argv[0]); 
fprintf(stderr, 

"\t\t-gc number_of_GCs_in_probe[%d]\n", 

NUM.GC); 
fprintf(stdeir, 

"\t\t-2 secondary_structure_rej ect(including this value)[%d]\n", 
SCORE_NEEDED_IN_2ND); 

fprintf(stderr, 

"\t\t-ch crosshyb_reject(including this value)[%d]\n", 

CROSSHYB_CUTOFF); 
fprintf(stderr, 

"\t\t-sm simple_match_reject(including this value)[%d]\n", 

SIMPLE_CUTOFF); 

fprintf(stderr, 

"\t\t-olcrosshyb_overlapJength[%d]\n H , 

CROSSHYB_OVERLAP); 

fprintf(stderr, 

"\t\t-eq add'l_equiv_bp_in_compat_checking[%c%c] (e.g., -eq gt)\n", 
eql,eq2); 

fprintf(stdeiT, "\t\t-fn first_N_length[%d]\n", FIRST_N); 
fprintf(stderr, "\t\t-gc_add GCjidd4j3enalty[%d]\n", GC_ADDITION); 

exit(O); 

} 

// parse input parameters, 
u-l; 

while(ii < argc) 
{ 

if(strcmp(argv[ii], "-gc") = 0) 
sscanf(argv[ii+l], "%d", &NUM_GC); 

else iftstrcmp(argv[ii], n -2") = 0) 
sscanf(argv[ii+l], "%d", &SCORE_NEEDED_IN_2ND); 

else if(strcmp(argv[ii], "-ch") = 0) 
sscanf(argv[ii+l], "%d M , &CROSSHYB_CUTOFF); 

else if(strcmp(argv[ii], "-ol") = 0) 
sscanf(argv[ii+l], "%d", &CROSSHYB_OVERLAP); 

else if(stranp(argv[ii], "-eq") — 0) 
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if(strlen(argv[ii+l]) = 2 1| 
(strlen(argv[ii+l]) — 3 && argv[ii+l][2] — V)> 

{ 

eql = argv[ii+l][0]; 
eq2 = argv[u+l][l]; 

} 

else 

* fprintf(stderr, "\nERROR: Invalid string after -eq flag.W); 
exit(l); 

} 

} 

else if(strcmp(argv[ii], "-o") == 0) 

{ if((fp = fopen(argv[ii+l], "w")) = NULL) 

^fprintf(stderr, "Can't open file %s to writeAn", argv[ii+l]); 
exit(l); 

} 

char logname[128]; 

sprintf(logname, "%s.log", argv[ii+l]); 

if((fplog = fopenOogname, "w")) «— NULL) 

^ fprintf(stderr, "failed creating log. stderr used.\n"); 
fplog = stderr, 

} 

} 

else if(strcmp(argv[ii], "-fn") — 0) 

* sscanf(argv[ii+l],"%d",&FIRST_N); 
} 

else if(strcmp(argv[ii], "-sin") = 0) 

{ sscarif(argv[ii+l]," 0 /od",&SIMPLE_CUTOFF); 
} 

else if(strcmp(atgv[U], "-gc_add") — 0) 

^ sscanf(argv[ii+l], "%d", &GC_ADDITION); 
} 
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else 

fprintf(stderr, "Unknow flag %s\n M , argv[ii]); 
exit(l); 

} 

ii+=2; 

} 

max_prb = 30000; 
probe = new char* [max_prb]; 
for(ii - 0; ii < max_prb; ii++) 
probepi] = new char [LEN_MER+1]; 
cnt_prb - 0; 

max_snd = 5000; 
sndstr = new char* [max_snd]; 
for(ii = 0; ii < max_snd; ii++) 
sndstrpi] = new char [LEN_MER+1]; 
snd_matchcnt = new int [max_snd]; 
cnt_snd = 0; 

// build an array of probes. Each probe is of length LEN_MER, 
// of which *NUM_GC are Gs or Cs. 

convert[0] = 'a'; 

convert[l] = , t'; 
convert[2] = 'c'; 

convert[3] = 'g'; 
total_probes = 0; 

passage = 0; // number of probes pass GC test. 
for(MM[0] = 0; MM[0] < 4; MM[0]++) 
for(MM[l] = 0; MM[1] < 4; MM[1]++) 
for(MM[2] = 0; MM[2] < 4; MM[2]++) 
for(MM[3] = 0; MM[3] < 4; MM[3]++) 

for(MM[4] = 0; MM[4] < 4; MM[4]++) //*things to pay attention to. 
for(MM[5] = 0; MM[5] < 4; MM[5]++) //'things to pay attention to. 
for(MM[6] = 0; MM[6] < 4; MM[6]++) //'things to pay attention to. 
for(MM[7] = 0; MM[7] < 4; MM[7]++) //'things to pay attention to. 

{ 

total_probes++; 
gesum = 0; 
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for<jj = 0; jj < LEN.MER; 
{ 

str[jj] = convert [MM[jj]]; 
if(str[ij] = , c'||str[ij] = , g') 
gcsum++; 

} 

str[LEN_MER] = W; 

// check its GC contents and secondary structure. 

if(gcsum = NUM_GC) 

{ 

pass _gc++; 

fprintf(fylog, "pass GCtest: %s\n", str); 

if(!SecondStruct(str)) 
{ 

strcpy(probe[cntjprb], str); 
if(++cnt_prb = max_prb) 

{ 

// should relocate memory. 

// To simplefy the program, let's just give an error msg. 

fprintf(stderr, "ERROR: Probe array is too small. cnt_prb is %d\n", cnt_prb); 

exit(l); 

} 

} 

else 

{ 

// record the rejected string 
strcpy(sndstr[cnt_snd], str); 
if(-H-cnt_snd = max_snd) 

^ fprintf(stderr, "ERROR: Secondary Structure array is too small. cnt_snd = %d\n", 

cnt_snd); 

exit(l); 

} 

} 

} 

} 

fprintf(fp, "\n%d mer probe selection\n", LEN_MER); 
fprintf(Q), "Number of GCs in the probes: %d\n", NUM_GC); 
fij)rintf(rp, "Score to reject as secondary structure: %d\n", 

SCORE_NEEDED_IN_2ND); 
fprintf(fp, "Score to reject as incompatible: %dAn", CROSSHYB.CUTOFF); 
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(print**. "Compatible test overlap: %d\n", CROSSHYB.O^RLAP); 
fprintflfp, "Additional equivalent base-pair in compatibility checking: %c /oc\n , 

fprmt^/Smiple match cutoff value(including): %d\n", SIMPLE.CUTOFF); 
fbrintfffb "First N valuefincluding): %d\n'\ FIRST_N); 
IZm, "Actional penalty for G or C: %d\n", GC_ADDITION); 
fprintf(fp, "\n\n"); 

fprintf(fp "Total possible %dmers: %d\n", LEN.MER, total_probes); 

fprintf(fp; "Number passed GC.test : %d\n", pass_gc); 

fprintf(fp, "Number passed secondary structure test : %d\n , cnt_prb), 
// for(ii = 0;ii<cnt_snd;ii++) 
// fprintf(fp,"%s\n",sndstr[ii]); 



// From the set (call it setl) of probes which passed GC and 2nd structure 
// tests, choose a probe into the final set(set2). Then compare this 
// probe against all the probes left in setl and throught out the ones 
// that may crosshyb to this probe. From what's left in setl, choose 
// another probe and compary it to the rest of setl ... 

compatible = new char [cnt_prb]; 
for(ii = 0; ii < cnt _prb; ii++) 
{ 

compatible[ii] = T; 

} 

// Compatibility check #1 : Use weighted scores to penalize neighboring matches. 
// first match_scorc = 1 ; 

// if prev pair is a match, current_match_score = P rev_match_score 2. 
ii = 0; 

failed_ch = 0; 
while(ii < cnt_prb) 

^ forQj - ii+1 ; jj < cnt_prb; jj++) 

{ 

compatiblefjj] = 'F; 
failed_ch++; 

fprintf(fplog, "Rejected(%d) %s in slide test for /os\n , 
s, probeQj], probe[ii]); 



} 
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ii++; . 
while(ii < cnt_prb && compatiblepi] = 'F) 

ii++; 

fprintf(fp, "Number of probes passed compatibility test: %d\n M , 
cnt_prb - failed_ch); 

// Compatibility check #2: Use unweighted score: count inconsecutive matches 

// find the first 'passed' probe. 
ii = 0; 

while(ii < cnt_prb && compatible[ii] = T) 
ii++; 

failed_sm = 0; 
while(ii<cnt_prb) 

forQj = it+1; jj < cnt_prb; 

iffcompatiblepj] — T && ™™™ 
(s=SimpleMatch(probe[ii],probe[ij])) >= SIMPLE_CUTOFF) 

{ 

compatibleQj] = *F; „ 
fprintf(frlog, "Rejected(%d) %s in simple_match test for /os\n , 

s, probeDj], probe[ii]); 
failed_sm-H-; 

> 

} 

ii++; 

while(ii < cnt_prb && compatiblepi] = V) 
ii++; 

fprintf(rp, "Number of probes passed simple match test: %d\n", 
cnt_prb - failed_ch - failed_sm); 

// CompatibiUty check #3: if the first N bases match ANYWHERE in another probe. 

// find the first 'passed' probe. 
ii = 0; 

while(ii < cnt_prb && compatiblepi] = T) 
ii++; 

failed_m = 0; 
while(ii < cnt_prb) 
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{ 

for(ij - ii+1 ; jj < cnt_prb; jj++) 
{ 

if(compatible[ij] = T && 
FirstN(probe[ii], probeQj], FIRST_N) = T) 

{ 

compatible!]]] = T'; 
foiled_m++; 

fprintf(fplog, "Rejected %s in FIRSTN test for %s\n", 
probe[jj], probelii]); 

} 

} 

ii++; 

while(ii < cntjprb && compatiblepi] = 'F') 
u++; 

} 

fprintf(fp, "Number of probes passed FIRSTN compatibility test: %d\n", 
cnt_prb - failed_ch - failed_sm - failed_m); 

// output. 

fprintf(fp, M \nSelected probes are: \n"); 

for(ii = 0; ii < cnt_prb; ii++) 

{ 

if(compatible[ii] = T) 
{ 

fprintf(fp, "%s \n", probepi]); 

} 

} 

} 



// Check if 'str 1 contains a secondary structure. That is, if there is a 
// consecutive 3 bases that matches when 'str* is folded. 
// return 1 if found secondary structure, 0 otherwise. 

int Seconds truct(const char *str) 
{ 

int ii, jj, kk, 11; 
int sum, score[32]; 
charprev match; 

cteW; p|Q -J6H 



char complement[256]; 
complement['a'] = T; 
complement[t'] = 'a'; 
complementfc'] = 'g'; 
complementrgT = 'c'; 

11 = strlen(str); 

compl = new char [11+1]; 

for(ii = 0;ii<U;ii++) 

complfii] = complement[str[ii]]; 

} 

for(ii = MATCH_NEEDED_IN_2ND; ii < 11 - MATCH_NEEDED_IN_2ND; ii++) 
{ 

prev_match = 'F; 
sum - 0; 

forQj = 0;jj<ii;jj++) 
{ 

score [jj] = 0; 
kk = ii*2-jj; 
if(kk < 11) 

{ 

if(str[jj] = compl[kk]) 
{ 

if(prev_match = T) 
{ 

scoreQj] - scoreQj-1] * 2; 

} 

else 
{ 

scoreQj] = 1; 
prev_match = T; 

} 

} 

else 
{ 

prev match = T; 

} 

} 

sum += scoreQj]; 

} 

// fprintf(stderr, "2' sum = %d\n", sum); 
if(sum >= SCORE_NEEDED_IN_2ND) 
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( 

delete Q compl; 

return 1 ; // Found a 2nd structure. 

} 

} 

for(ii » MATCH_NEEDED_IN_2ND - 1; ii < U - MATCH_NEEDED_IN_2ND; ii++) 
{ 

prev_match = T; 
sum = 0; 

for(ij = 0;jj<=ii;jj++) 

q score[jj] = 0; 

n kk = ii*2+l -jj; 

5 ifCkk < 11) 

y { 

Ly if(str[jj] = compl[kk]) 

m { 

m if(prev_match = T) 

{ 

scoreQj] = scoreQj- 1]*2; 

} 

else 

{ 

scorefjj] = 1; 
prev match = T; 

} 

} 

else 
{ 

prev_match = 'F; 

} 

} 

sum += scoreQj]; 

- } 

// fprintfi[stderr, "2" sum - %d\n'\ sum); 
if(sum >= SCORE_NEEDED_IN_2ND) 

{ 

delete Q compl; 

return 1 ; // Found a 2nd structure. 

} 



} 

delete Q compl; 
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return 0; // No 2nd structure. 

} 

// check if strl and str2 can hybridizy together. 
// return the max of match scores. 
// Assume strlen(strl) = strlen(str2). 

int CrossHyb(const char *strl, const char *str2, int overlap) 
{ 

int ii, jj, len, sum, score, prev_score, max_sum, numGC; 
char prev_match; 

len = strlen(strl); 
max_sum = 0; 

fprintf(fplog, "Sliding test between %s and %s\n", strl, str2); 

for(ii = overlap-len; ii <= len-overlap; ii++) 
{ 

numGC = 0; 
sum = 0; 

score = prev_score = 0; 
prevjnatch = 'F; 
fprintf(fplog, "Compare "); 
for(jj = ii; jj < len && jj - ii < len; jj++) 
{ 

if(jj>=0&&jj-ii>=0) 



fprintf(fplog, "(%c,%c) ", strlQj], str2Qj-ii]); 
if((strlQj] = str2[jj-ii]) || 
(strlQj] — eql && str2[jj-ii] — eq2) || 
(strl Qj] — eq2 && str2[jj-ii] — eql)) 

{ if(((strl Qj]|32) — 'g' && (str2[jj-ii]|32) = 'g') || 
((strl Qj]|32) — 'C && (str2Qj-ii]|32) = 'c')) 
numGC++; 

if(prev_match = T) 
{ 

score = prev_score*2; 



else 
{ 



score = 1; 




prevjnatch = T; 

} 

} 

else 
{ 

score = 0; 
prevjnatch = 'F; 

} 

sum += score; 
prev_score = score; 

} 

} 

fprintf(fplog, "Score=%d\n", sum + numGC*GC_ADDITION); 

if(sum + numGC * GC_ADDITION > max_sum) 
max sum = sum + numGC * GC_ADDITION ; 

} 

fprintf(fplog, "Max score is %d\n", max_sum); 
return max_sum; 



// Compare 2 strings base to base, 0 to 0, 1 to 1..., no sliding. 

// return number of matches. 

// Assume strlen(strl) = strlen(str2). 

int SimpleMatch(const char *strl, const char *str2) 
{ 

int ii, sum; 
sum = 0; 

for(ii = 0; ii < strlen(strl); ii++) 
{ 

if((strl[ii] = str2[ii]) || 
(strl [ii] = eql && str2[ii] — eq2) || 
(strl [ii] = eq2 && str2[ii] — eql)) 

{ 

sum++; 

} 

} 

return sum; 
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// Check if the first N bases of the two probes are identical, 
char FirstN(const char *strl, const char *str2, int N) 

{ 

int ii; 

char match = T; 

if(N>strlen(strl)) 
return 'F; 

for(ii = 0; ii<N;u++) 
{ 

if(!((strl[ii] = str2[ii])|| 
(strl [ii] — eql && str2[ii] = eq2) || 
(strl [ii] — eq2 && str2[ii] — eql))) 

{ 

match = 'P; 
break; 

} 

} 

return match; 
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Synthesize the nucleotide A on a column 
and in parallel synthesize G, C and T 
each on their own oligo synthesis 
columns. 



After second round of synthesis there are a 
total of 16di-nucleotide sequences. If 
columns were combined, mixed, aliquoted, 
and followed by a third round of synthesis, a 
total of 64 different tri-nucleotides would be 
generated. 



(1) :30HL00ll.LMD PKTl/TS Single beads 



Single beads 




Granularity 



( 1 ) : 3 O HL OOll 



. LMD PKT2 LOG Single toeeicls 



2 fH 



01 



RIMA negative 



io 



— i 1 — i i n 




_ RNA positive 



I xo- 



'lio 3 ' 



F A.M oligo hytoridiration 



ru 



(1>:30HL000S.LHD PHTt/fS Single b« 
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Starting Perturbagen Library in 
cell grown in dextrose = non 
expressing conditions 




Introduce ID Tagged Perturbagen library into 
cells and express the perturbagen 




Each cell expresses a 
different perturbagerL 
Most have no effect. . . 
some kill 
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Reisolate plasmid library after passage from induced 
and non induced grown cells. Keep libraries separate. 




T7 transcribe ID Tags in passaged 
"Dextrose" library and "Galactose" Library 
to generate mg quantities of ID Tags as 





FITC label Starting 
Library RNA 



Rhodamine label 
Passaged Library 





Combine and hybridize RNA from 
libraries to ID Tag Beads 




Sequence and clone Perturbagen 
DNA back into expression construct 
and validate effect 
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Use specific ID-Tag primer and a 
common vector primer juxtaposed 
to perturbagen to amplify 
corresponding perturbagen 



Sequence ID Tag and 
generate complement oligo 
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RT-PCR amplify RNA off beads 
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Sort single FITC+ beads 
into 96 well PCR Plate 
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FITC Fluorescence 



Sort all Beads on Cell sorter to identify Beads 
labeled only with FITO RNA 



